
** Spatial approach, 2: Grid of product space ** 

	**********------------------------------**********
	* Contents: 
	* Part 1: sets up spatial structure of product space
		* Cell width is 3 kwh (s-bin) and 0.5 kg (a-bin)
		* Input is the product-level data from spatial_1_raw_s 
		* Output is spatial_4_sample_*
	* Part 2: file collapses data into cells of product space
		* Collapse at the cell-country-month level 
		* Output is spatial_2_collapse_* 

capture log close 
clear all
set more off 

cd "R:\WSV2\TBu_AKe\Spatial_NEW"

capture mkdir Data
global store "R:\WSV2\TBu_AKe\Spatial_NEW\Data"
capture ssc install carryforward

log using spatial_2_collapse_s, replace 
cd // display in log file 

** load data from spatial_1_raw 
use spatial_1_raw_s, replace  // slow when loaded from server


*************************************
** Part 1: Product Space Set-up *****
*************************************

  ** 1: generate cell boundaries

scalar kappa = 51.7*0.59 // intercept at compliance line 
scalar sigma = 47*0.59 // attribute slope at compliance line 
scalar D = 3 // interval

	** gen identifying points 
gen a_min = a_bin - 0.25 // left boundary 
gen a_max = a_bin + 0.25 // right boundary 
gen k_bin = s_bin + `=kappa' // distance to compliance line
	** 
gen e_low_left = k_bin + a_min*`=sigma'
gen e_up_left = e_low_left + `=D'
gen e_low_right = e_low_left + 0.5*`=sigma'
gen e_up_right = e_low_left + `=D' + 0.5*`=sigma'

	** test a cell. 
sum e_low* e_up* if a_bin == 6 & s_bin == 0 // 189, 192 at left - 203, 206 at right 

	** label as safety net 
gen e_bin = e_low_left 
label variable e_bin "E_i low-left"
label variable a_min "a_i left"
label variable a_max "a_i right"
label variable k_bin "kappa (std. E)"

*****************************************************
	** 2a: fixes needed to get loops to work **
drop if a_bin == . // missing ids. 
drop if e_bin == .  // 
drop if s_bin < -252 // censoring at lowest value 
replace added_a = 1 if added_a == . // 1 if added cell, 0 otherwise. 
replace added_s = 1 if added_s == . // 1 if added cell, 0 otherwise. 

	
egen cell=group(e_bin a_bin) // rectangles with 3 kwh and 0.5 kg 
codebook cell 
	* result: 4950 (198*25) 
	
codebook e_bin // 4950
codebook a_bin // 25. 
*scatter s_bin a_bin, msize(small) msymbol(Dh) // checkpoint: grid is regular. 

	
******** define segments in product space ***********

gen r = 0 // restricted
gen b = 0 // bunching 
gen u = 0 // unrestricted

replace r = 1 if eei_bin >= 59 
replace b = 1 if eei_bin < 59 & eei_bin >= 57 // 2-unit eei window : 69k obs. 
replace u = 1 if eei_bin < 57 	  

*********************************************
	** 2b: outcome by cell: country-specific * 	
	
		************
		** yearly **
		************		
	** cell-level sales, country specific
by cell ccode, sort: egen cell_sales=sum(units) // all years 
by cell year ccode, sort: egen cell_year_sales=sum(units) // by year 

	** cell-level product count 
by cell year ccode id, 	sort: gen n=_n 	
replace n=0 			if n>1 &n!=.  
replace n=0 if added_a == 1
replace n=0 if added_s == 1 
by cell year ccode, 		sort: egen cell_year_count=sum(n) // by year 
drop n	
	
by cell ccode id, 	sort: gen n=_n 	
replace n=0 			if n>1 &n!=. 
	*cell-level product count 
replace n=0 if added_a == 1
replace n=0 if added_s == 1 
by cell ccode, 		sort: egen cell_count=sum(n) // all years 
drop n 


	** cell-level sales in R and B  
by cell year ccode, sort: egen cell_year_sales_R=sum(units) if r== 1 // by year 
by cell year ccode, sort: egen cell_year_sales_B=sum(units) if b == 1 // by year 

	** cell-level product count 
by cell year ccode id, 	sort: gen n=_n 	
replace n=0 			if n>1 &n!=.  
replace n=0 if added_a == 1
replace n=0 if added_s == 1 
replace n=0 if r == 0 
by cell year ccode, 		sort: egen cell_year_count_R =sum(n) // by year 
drop n	
	
by cell year ccode id, 	sort: gen n=_n 	
replace n=0 			if n>1 &n!=.  
replace n=0 if added_a == 1
replace n=0 if added_s == 1 
replace n=0 if b == 0 
by cell year ccode, 		sort: egen cell_year_count_B =sum(n) // by year 
drop n	

sum cell_* // outcomes at cell-level 

sum cell e_low* e_up* if a_bin == 6 & s_bin == 0 // 189, 192 at left - 203, 206 at right 

	**************	
	** monthly ***
	**************

	** cell-level sales, country specific
by cell date ccode, sort: egen cell_date_sales=sum(units) // by date 

	** cell-level product count 
by cell date ccode id, 	sort: gen n=_n 	
replace n=0 			if n>1 &n!=.  
replace n=0 if added_a == 1
replace n=0 if added_s == 1 
by cell date ccode, 		sort: egen cell_date_count=sum(n) // by date 
drop n	

summ cell_* // looks ok. 


*** replace missings for cells added in grid expansion 
	
foreach var of varlist price_all {
replace `var' = . if added_a == 1 
}

replace year = 2017 if added_a == 1 // 2017 is partial, at risk of being dropped. 
replace year = 2017 if added_s == 1 
replace date = 683 if added_a == 1 // 2017 is partial, at risk of being dropped. 
replace date = 683 if added_s == 1 


foreach var of varlist units cell_year_sales cell_year_count cell_date_sales cell_date_count {
replace `var' = 0 if added_a == 1 
replace `var' = 0 if added_s == 1 
}
	
*save spatial_2_collapse_s0, replace // intermediate save to explore raw data before collapse 


******************************************************
	** 3: save and collapse at id-county-level 
cd "$store"
    ** collapse #1a: by id year for descriptives  
gen price2 = price_all

preserve
sort year month cell id
gen units2 = units
#delimit ;
collapse (firstnm) id_c cell e_* a_* k_bin eei_bin s_bin s capacity kwh eei class_A wm_labelbin revs_min_2 type_wm_2
		 (max) cell_year_sales cell_year_count sum_countries 
		 (sum) units_clean units price_all b r u 
		 (mean) price2 price price_eur age_country age_xc age_country_reverse, 
		 by(id ccode year) ;
#delimit cr	
	** save, for merging later 
save spatial_4_sample_s, replace 
restore 

    ** collapse #1b: by id date for estimation sample 
preserve
sort year month cell id
gen units2 = units
#delimit ;
collapse (firstnm) id_c cell e_* a_* k_bin eei_bin s_bin s capacity kwh eei class_A wm_labelbin revs_min_2 type_wm_2
		 (max) cell_date_sales cell_date_count cell_year_sales cell_year_count sum_countries 
		 (sum) units_clean units price_all b r u 
		 (mean) price2 price price_eur age_country age_xc age_country_reverse, 
		 by(id ccode date) ;
#delimit cr	
	** save, for merging later 
save spatial_4_sample_mc, replace 
restore 


*******************************************
** Part 2: prepare W_matrix aggregation  **
*******************************************

	** collapse #2: by cell year for W_matrix 

		*** yearly *** 
preserve 
sort cell ccode year // stays the same!!! 

replace ccode = 0 if missing(ccode)
#delimit ;
collapse (max) cell_year_sales cell_year_count (min) eei_bin2 
		 (sum) units price_all b r u  
		 (mean) e_* a_* k_bin s_bin eei_bin capacity kwh s price2 eei 
		 (firstnm) added_*, 
		 by(cell ccode year); 
#delimit cr
order cell_year*, last 
order price_all price2, last 
order capacity kwh units added_*, last


******************
*** clean up data 
*****************

fillin cell ccode year // fillin to expand grid structure  
codebook cell //  
label variable _fillin "cell-year_fill" // 
replace added_a = 1 if added_a == . // identifier for added cells 1   

xtset cell // impose cell structure 

*** replace missings with zero for outcome, set price missing if zero
foreach var of varlist cell_year_count cell_year_sales units {
	replace `var' = 0 if `var' == . 
	replace `var' = 0 if added_a == 1 & _fillin == 0 // from collapse structure 
}

foreach var of varlist price_all price2 {
	replace `var' = . if `var' == 0  
}

*** fix missings in cell address (happens with collapse)
	** capacity a 
foreach var of varlist a_min a_max a_bin eei_bin {
	gsort cell +`var'
	bysort cell: carryforward `var', gen(`var'3)
	quietly list cell `var' `var'3, clean noobs

	replace `var' = `var'3 
	drop `var'3 
}


	** energy consumption e
foreach var of varlist e_bin e_low_left e_low_right e_up_left e_up_right k_bin s_bin {
	gsort cell +`var'
	bysort cell: carryforward `var', gen(`var'3)
	quietly list cell `var' `var'3, clean noobs

	replace `var' = `var'3 
	drop `var'3 
}

*gen eei_digit = ((e_bin)/(47*a_bin+51.7))*100 // check on aggregation structure   
*replace eei = round(eei_digit, 0.1) //  


******** define B and R by s-bin ************

gen restricted = 0 
gen bunching = 0 

** tag bunching cells by distance to compliance line 
replace restricted = 1 if s_bin >= 0 // incompliant
replace bunching = 1 if s_bin == -3 // first row in compliance
replace bunching = 1 if s_bin == -6 // second row in compliance 

gen space = 0 
replace space = 1 if restricted == 1 // 
replace space = 2 if bunching == 1 // 

********************************************
	** order cells by segment 

rename cell cell0 
gsort +year +ccode -space +k_bin +e_bin // sort order is critical 
by year ccode: gen cell = _n // cells are ordered to consider bunching first, then restricted, then rest. 

**************************
save spatial_2_collapse_s, replace 
****************************

restore
cd "$store" 
		*** monthly  *** 
preserve 
sort cell ccode year month // sort order is critical  
*gen price2 = price_all
replace ccode = 0 if missing(ccode)
#delimit ;
collapse (max) cell_date_sales cell_date_count (min) eei_bin2 
		 (sum) units price_all b r u  
		 (mean) e_* a_* k_bin s_bin eei_bin capacity kwh s price2 eei 
		 (firstnm) added_* year month, 
		 by(cell ccode date); 
#delimit cr
order cell_date*, last 
order price_all price2, last 
order capacity kwh units added_*, last


******************
*** clean up data 
*****************

fillin cell ccode year month // sort order 
codebook cell //  4950 unique cells 
label variable _fillin "cell-month_fill" // 
replace added_a = 1 if added_a == . // tag added cells   

xtset cell // impose spatial structure 

foreach var of varlist cell_date_count cell_date_sales units {
	replace `var' = 0 if `var' == . 
	replace `var' = 0 if added_a == 1 & _fillin == 0 // from collapse structure 
}

foreach var of varlist price_all price2 {
	replace `var' = . if `var' == 0  
}

*** fix missings in cell address (happens with collapse)

	** capacity a
foreach var of varlist a_min a_max a_bin eei_bin {
	gsort cell +`var'
	bysort cell: carryforward `var', gen(`var'3)
	quietly list cell `var' `var'3, clean noobs

	replace `var' = `var'3 
	drop `var'3 
}


	** energy consumption e 
foreach var of varlist e_bin e_low_left e_low_right e_up_left e_up_right k_bin s_bin {
	gsort cell +`var'
	bysort cell: carryforward `var', gen(`var'3)
	quietly list cell `var' `var'3, clean noobs

	replace `var' = `var'3 
	drop `var'3 
}


rename date date_cal  // date format and fillin creates error 
gen year_cal = year 

replace year_cal = year - 1 if month == 12

sort year_cal month
generate date = ym(year_cal, month) // date variable out of year and month
format date %tm 

*gen e_max_digit = 0 		
*bysort eei_bin a_bin: replace e_max_digit = ((eei_bin+1)/100)*(47*a_bin+51.7)

*gen eei_digit = ((e_bin)/(47*a_bin+51.7))*100 // used in robustness check for rounding  
*replace eei = round(eei_digit, 0.1) // 

******** define B and R by s-bin ************

gen restricted = 0 
gen bunching = 0 

replace restricted = 1 if s_bin >= 0 // incompliant 
replace bunching = 1 if s_bin == -3 // first row in compliance 
replace bunching = 1 if s_bin == -6 // second row in compliance 

gen space = 0 
replace space = 1 if restricted == 1 
replace space = 2 if bunching == 1 

rename cell cell0 
gsort +year +month +ccode -space +k_bin +e_bin // sort order is critical 
by year month ccode: gen cell = _n // cells are ordered to consider bunching first, then restricted then rest. 

**************************
save spatial_2_collapse_mc, replace 
****************************

restore 

log close

clear 

exit 